library(grf)
library(foreign)
library(haven)
library(rio)
library(party)
library(mltools)
library(mlr)
library(data.table)
library(dplyr)
library(labelled)
library(caTools)
library(xgboost)
library(caret)
library(tree)
library(datasets)
require(haven)
library(Matrix)
library(Seurat)
library(gtools)
library(binsreg)

rm(list=ls())


### train ###
train <- rio::import("J:/workdata/706727/Temp/ImputeEmp.dta")

X <- as.matrix(select(train,-pnr,-year, -month, -grademp))

Y <- train$grademp

# Tuning #

lrn <- makeLearner("classif.xgboost",predict.type="prob")
lrn$par.vals <- list(objective="binary:logistic", eval_metric="auc",nthread=16,early_stopping_rounds=5, nrounds=20L, eta=0.5)

gridparams <- makeParamSet(makeIntegerParam("max_depth",lower=1L,upper=10L),
                           makeNumericParam("subsample",lower=0.25,upper=1),
                           makeNumericParam("colsample_bytree",lower=0.25,upper=1),
                           makeNumericParam("gamma",lower=0,upper=10))

dataframetrain <- as.data.frame(X)
dataframetrain$Y <- as.factor(Y)
traintask <- makeClassifTask(data=dataframetrain, target="Y")

rdesc <- makeResampleDesc("CV",  iters=2L)
ctrl <- makeTuneControlRandom(maxit=25L)
mytune <- tuneParams(learner=lrn, task=traintask, resampling=rdesc, measures=auc,
                     par.set=gridparams, control=ctrl, show.info=T)

xgb_params <- list(
  booster="gbtree",
  eta=0.2,
  nthread=16,
  objective="binary:logistic",
  eval_metric="auc")

xgb_params <- append(xgb_params, mytune$x)

xgb_train <- xgb.DMatrix(data=X, label=Y)

# Number of rounds #
xgbcv <- xgb.cv(
  params=xgb_params,
  data=xgb_train,
  nrounds=1000,
  early_stopping_rounds=5,
  nfold=5
)
numrounds <- xgbcv$best_iteration

model <- xgb.train(params=xgb_params, data=xgb_train, nrounds=numrounds, verbose=1)

### test ###
test <- rio::import("J:/workdata/706727/Temp/ImputeEmp_Test.dta")

Xtest <- as.matrix(select(test,-pnr,-year, -month))

pred <- predict(model,as.matrix(Xtest),response=T)

emp <- ifelse(as.numeric(pred)>0.5,1,0)

emppreds <- cbind(select(test,pnr,year,month),emp)

rio::export(emppreds, "J:/workdata/706727/Temp/ImputeEmp_Preds.dta")

imp <- xgb.importance(model=model)


